## Loading required package: lattice
## Loading required package: ggplot2
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:gdata':
## 
##     combine
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## Loading required package: magrittr
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
summary(dataset)
##  Customer.Lifetime.Value        State      Response       Coverage   
##  Min.   : 1898           Arizona   :1703   No :7826   Basic   :5568  
##  1st Qu.: 3994           California:3150   Yes:1308   Extended:2742  
##  Median : 5780           Nevada    : 882              Premium : 824  
##  Mean   : 8005           Oregon    :2601                             
##  3rd Qu.: 8962           Washington: 798                             
##  Max.   :83325                                                       
##                                                                      
##                 Education         EmploymentStatus Gender  
##  Bachelor            :2748   Disabled     : 405    F:4658  
##  College             :2681   Employed     :5698    M:4476  
##  Doctor              : 342   Medical Leave: 432            
##  High School or Below:2622   Retired      : 282            
##  Master              : 741   Unemployed   :2317            
##                                                            
##                                                            
##      Income       Location.Code   Marital.Status Monthly.Premium.Auto
##  Min.   :    0   Rural   :1773   Divorced:1369   Min.   : 61.00      
##  1st Qu.:    0   Suburban:5779   Married :5298   1st Qu.: 68.00      
##  Median :33890   Urban   :1582   Single  :2467   Median : 83.00      
##  Mean   :37657                                   Mean   : 93.22      
##  3rd Qu.:62320                                   3rd Qu.:109.00      
##  Max.   :99981                                   Max.   :298.00      
##                                                                      
##  Months.Since.Last.Claim Months.Since.Policy.Inception
##  Min.   : 0.0            Min.   : 0.00                
##  1st Qu.: 6.0            1st Qu.:24.00                
##  Median :14.0            Median :48.00                
##  Mean   :15.1            Mean   :48.06                
##  3rd Qu.:23.0            3rd Qu.:71.00                
##  Max.   :35.0            Max.   :99.00                
##                                                       
##  Number.of.Open.Complaints Number.of.Policies         Policy.Type  
##  Min.   :0.0000            Min.   :1.000      Corporate Auto:1968  
##  1st Qu.:0.0000            1st Qu.:1.000      Personal Auto :6788  
##  Median :0.0000            Median :2.000      Special Auto  : 378  
##  Mean   :0.3844            Mean   :2.966                           
##  3rd Qu.:0.0000            3rd Qu.:4.000                           
##  Max.   :5.0000            Max.   :9.000                           
##                                                                    
##           Policy     Renew.Offer.Type     Sales.Channel 
##  Personal L3 :3426   Offer1:3752      Agent      :3477  
##  Personal L2 :2122   Offer2:2926      Branch     :2567  
##  Personal L1 :1240   Offer3:1432      Call Center:1765  
##  Corporate L3:1014   Offer4:1024      Web        :1325  
##  Corporate L2: 595                                      
##  Corporate L1: 359                                      
##  (Other)     : 378                                      
##  Total.Claim.Amount       Vehicle.Class   Vehicle.Size 
##  Min.   :   0.099   Four-Door Car:4621   Large  : 946  
##  1st Qu.: 272.258   Luxury Car   : 163   Medsize:6424  
##  Median : 383.945   Luxury SUV   : 184   Small  :1764  
##  Mean   : 434.089   Sports Car   : 484                 
##  3rd Qu.: 547.515   SUV          :1796                 
##  Max.   :2893.240   Two-Door Car :1886                 
## 
sapply(dataset, function(x) sum(is.na(x)))
##       Customer.Lifetime.Value                         State 
##                             0                             0 
##                      Response                      Coverage 
##                             0                             0 
##                     Education              EmploymentStatus 
##                             0                             0 
##                        Gender                        Income 
##                             0                             0 
##                 Location.Code                Marital.Status 
##                             0                             0 
##          Monthly.Premium.Auto       Months.Since.Last.Claim 
##                             0                             0 
## Months.Since.Policy.Inception     Number.of.Open.Complaints 
##                             0                             0 
##            Number.of.Policies                   Policy.Type 
##                             0                             0 
##                        Policy              Renew.Offer.Type 
##                             0                             0 
##                 Sales.Channel            Total.Claim.Amount 
##                             0                             0 
##                 Vehicle.Class                  Vehicle.Size 
##                             0                             0

#Treated Outliers

## Data Exploration

#Customer Lifetime Value is dispropotionately high for those with 
#just one policy and similar in distribution for others

dataset$Number.of.Policies <- as.factor(ifelse(dataset$Number.of.Policies>1, ifelse(dataset$Number.of.Policies>3, ">3", "2-3"), "<2"))
a = ggplot(dataset, aes(x = Customer.Lifetime.Value))

a + geom_density(aes(y = ..count..,color = State))

#California and Oregon have highest number of customer. Most relevant levels

a + geom_density(aes(y = ..count..,color = Response))

#Most customers respond "No"

a + geom_density(aes(y = ..count..,color = Coverage))

#Basic coverage most relevant, followed by extended, then Premium

a + geom_density(aes(y = ..count..,color = Education))

#Bachelors, College and HS educated customers most relevant. 
#Masters and Doctors less so

a + geom_density(aes(y = ..count..,color = EmploymentStatus))

#Employed and Unemployed most relevant. Rest less so

a + geom_density(aes(y = ..count..,color = Gender))

#No significant variation with factors

a + geom_density(aes(y = ..count..,color = Location.Code))

#Suburban customers are most relevant.

a + geom_density(aes(y = ..count..,color = Marital.Status))

#Married customers are most relevat. Followed by Single and Divorced. 

a + geom_density(aes(y = ..count..,color = Policy.Type))

#Personal Auto is most relevant. Special Auto least

a + geom_density(aes(y = ..count..,color = Policy))

#Same as above. Personal L3 is most relevant

a + geom_density(aes(y = ..count..,color = Renew.Offer.Type))

#Offer 1 and 2 are most relevant. 

a + geom_density(aes(y = ..count..,color = Sales.Channel))

#Agent sales is most relevant. Followed by Branch, then CallCenter and Web

a + geom_density(aes(y = ..count..,color = Vehicle.Class))

#4-Door cars are most relevant

a + geom_density(aes(y = ..count..,color = Vehicle.Size))

#Medsize vehicles are most relevant

a + geom_density(aes(y = ..count..,color = Number.of.Policies))

#Low value customers have just 1 policy. High value have 2-3. Mid-Value Customers have >3
#Create Test and Train data

set.seed(100)
Train1 = createDataPartition(dataset[,1], p=0.7, list = FALSE)

trainData = dataset[Train1,]
testData = dataset[-Train1,]
model = step(lm(Customer.Lifetime.Value~., data = trainData), direction = "both")
## Start:  AIC=72463.69
## Customer.Lifetime.Value ~ State + Response + Coverage + Education + 
##     EmploymentStatus + Gender + Income + Location.Code + Marital.Status + 
##     Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Policy.Type + 
##     Policy + Renew.Offer.Type + Sales.Channel + Total.Claim.Amount + 
##     Vehicle.Class + Vehicle.Size
## 
## 
## Step:  AIC=72463.69
## Customer.Lifetime.Value ~ State + Response + Coverage + Education + 
##     EmploymentStatus + Gender + Income + Location.Code + Marital.Status + 
##     Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Policy + 
##     Renew.Offer.Type + Sales.Channel + Total.Claim.Amount + Vehicle.Class + 
##     Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Policy                         8 1.0041e+07 7.6591e+09 72454
## - Vehicle.Class                  3 1.5668e+06 7.6506e+09 72459
## - State                          4 5.4782e+06 7.6546e+09 72459
## - Sales.Channel                  3 3.4035e+06 7.6525e+09 72460
## - Education                      4 7.4454e+06 7.6565e+09 72461
## - Renew.Offer.Type               3 4.7181e+06 7.6538e+09 72461
## - Location.Code                  2 1.7248e+06 7.6508e+09 72461
## - Total.Claim.Amount             1 1.6308e+06 7.6507e+09 72463
## - Months.Since.Last.Claim        1 2.9448e+06 7.6520e+09 72464
## <none>                                        7.6491e+09 72464
## - Income                         1 3.2754e+06 7.6523e+09 72464
## - Gender                         1 3.4364e+06 7.6525e+09 72464
## - Vehicle.Size                   2 7.7646e+06 7.6568e+09 72465
## - Response                       1 9.0154e+06 7.6581e+09 72468
## - Coverage                       2 1.5242e+07 7.6643e+09 72470
## - Months.Since.Policy.Inception  1 1.6260e+07 7.6653e+09 72472
## - Marital.Status                 2 2.1758e+07 7.6708e+09 72474
## - Number.of.Open.Complaints      1 3.6359e+07 7.6854e+09 72486
## - EmploymentStatus               4 6.5316e+07 7.7144e+09 72499
## - Monthly.Premium.Auto           1 4.6323e+08 8.1123e+09 72761
## - Number.of.Policies             2 2.3946e+10 3.1595e+10 79677
## 
## Step:  AIC=72454.37
## Customer.Lifetime.Value ~ State + Response + Coverage + Education + 
##     EmploymentStatus + Gender + Income + Location.Code + Marital.Status + 
##     Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type + 
##     Sales.Channel + Total.Claim.Amount + Vehicle.Class + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Vehicle.Class                  3 1.3873e+06 7.6605e+09 72449
## - State                          4 5.7719e+06 7.6649e+09 72450
## - Sales.Channel                  3 3.4378e+06 7.6626e+09 72451
## - Renew.Offer.Type               3 4.5042e+06 7.6636e+09 72451
## - Education                      4 7.6361e+06 7.6667e+09 72451
## - Location.Code                  2 1.8950e+06 7.6610e+09 72452
## - Total.Claim.Amount             1 1.7253e+06 7.6608e+09 72454
## <none>                                        7.6591e+09 72454
## - Months.Since.Last.Claim        1 3.0747e+06 7.6622e+09 72454
## - Income                         1 3.3096e+06 7.6624e+09 72455
## - Gender                         1 3.4345e+06 7.6625e+09 72455
## - Vehicle.Size                   2 7.5778e+06 7.6667e+09 72455
## + Policy.Type                    2 1.8366e+06 7.6573e+09 72457
## - Response                       1 9.2601e+06 7.6684e+09 72459
## - Coverage                       2 1.5890e+07 7.6750e+09 72461
## - Months.Since.Policy.Inception  1 1.6535e+07 7.6756e+09 72463
## + Policy                         8 1.0041e+07 7.6491e+09 72464
## - Marital.Status                 2 2.2096e+07 7.6812e+09 72465
## - Number.of.Open.Complaints      1 3.6454e+07 7.6956e+09 72477
## - EmploymentStatus               4 6.4492e+07 7.7236e+09 72489
## - Monthly.Premium.Auto           1 4.6240e+08 8.1215e+09 72751
## - Number.of.Policies             2 2.3955e+10 3.1614e+10 79664
## 
## Step:  AIC=72449.29
## Customer.Lifetime.Value ~ State + Response + Coverage + Education + 
##     EmploymentStatus + Gender + Income + Location.Code + Marital.Status + 
##     Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type + 
##     Sales.Channel + Total.Claim.Amount + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - State                          4 5.8099e+06 7.6663e+09 72445
## - Sales.Channel                  3 3.4426e+06 7.6639e+09 72446
## - Renew.Offer.Type               3 4.4794e+06 7.6650e+09 72446
## - Education                      4 7.6098e+06 7.6681e+09 72446
## - Location.Code                  2 1.9071e+06 7.6624e+09 72447
## - Total.Claim.Amount             1 1.7749e+06 7.6623e+09 72448
## <none>                                        7.6605e+09 72449
## - Months.Since.Last.Claim        1 3.0708e+06 7.6636e+09 72449
## - Gender                         1 3.3720e+06 7.6639e+09 72450
## - Income                         1 3.4603e+06 7.6640e+09 72450
## - Vehicle.Size                   2 7.4680e+06 7.6680e+09 72450
## + Policy.Type                    2 1.7656e+06 7.6587e+09 72452
## - Response                       1 9.2560e+06 7.6698e+09 72453
## + Vehicle.Class                  3 1.3873e+06 7.6591e+09 72454
## - Months.Since.Policy.Inception  1 1.6729e+07 7.6772e+09 72458
## + Policy                         8 9.8613e+06 7.6506e+09 72459
## - Marital.Status                 2 2.2073e+07 7.6826e+09 72460
## - Coverage                       2 2.7444e+07 7.6879e+09 72463
## - Number.of.Open.Complaints      1 3.6457e+07 7.6970e+09 72471
## - EmploymentStatus               4 6.4231e+07 7.7247e+09 72484
## - Monthly.Premium.Auto           1 3.2624e+09 1.0923e+10 74252
## - Number.of.Policies             2 2.3954e+10 3.1614e+10 79658
## 
## Step:  AIC=72445.15
## Customer.Lifetime.Value ~ Response + Coverage + Education + EmploymentStatus + 
##     Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type + 
##     Sales.Channel + Total.Claim.Amount + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Sales.Channel                  3 3.4233e+06 7.6697e+09 72441
## - Renew.Offer.Type               3 4.5536e+06 7.6709e+09 72442
## - Education                      4 7.6579e+06 7.6740e+09 72442
## - Location.Code                  2 1.9257e+06 7.6682e+09 72442
## - Total.Claim.Amount             1 1.9371e+06 7.6682e+09 72444
## <none>                                        7.6663e+09 72445
## - Months.Since.Last.Claim        1 3.0692e+06 7.6694e+09 72445
## - Gender                         1 3.4511e+06 7.6698e+09 72445
## - Income                         1 3.4712e+06 7.6698e+09 72445
## - Vehicle.Size                   2 7.5226e+06 7.6738e+09 72446
## + Policy.Type                    2 1.8844e+06 7.6644e+09 72448
## + State                          4 5.8099e+06 7.6605e+09 72449
## - Response                       1 9.2926e+06 7.6756e+09 72449
## + Vehicle.Class                  3 1.4253e+06 7.6649e+09 72450
## - Months.Since.Policy.Inception  1 1.6610e+07 7.6829e+09 72454
## + Policy                         8 1.0154e+07 7.6562e+09 72454
## - Marital.Status                 2 2.2276e+07 7.6886e+09 72456
## - Coverage                       2 2.7441e+07 7.6938e+09 72459
## - Number.of.Open.Complaints      1 3.6304e+07 7.7026e+09 72467
## - EmploymentStatus               4 6.4356e+07 7.7307e+09 72480
## - Monthly.Premium.Auto           1 3.2574e+09 1.0924e+10 74245
## - Number.of.Policies             2 2.3955e+10 3.1621e+10 79651
## 
## Step:  AIC=72441.42
## Customer.Lifetime.Value ~ Response + Coverage + Education + EmploymentStatus + 
##     Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type + 
##     Total.Claim.Amount + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Education                      4 7.6644e+06 7.6774e+09 72439
## - Renew.Offer.Type               3 4.7843e+06 7.6745e+09 72439
## - Location.Code                  2 1.9566e+06 7.6717e+09 72439
## - Total.Claim.Amount             1 1.9865e+06 7.6717e+09 72441
## <none>                                        7.6697e+09 72441
## - Months.Since.Last.Claim        1 3.0441e+06 7.6728e+09 72441
## - Income                         1 3.4271e+06 7.6732e+09 72442
## - Gender                         1 3.4993e+06 7.6732e+09 72442
## - Vehicle.Size                   2 7.6497e+06 7.6774e+09 72442
## + Policy.Type                    2 2.0379e+06 7.6677e+09 72444
## + Sales.Channel                  3 3.4233e+06 7.6663e+09 72445
## - Response                       1 8.9947e+06 7.6787e+09 72445
## + State                          4 5.7905e+06 7.6639e+09 72446
## + Vehicle.Class                  3 1.4334e+06 7.6683e+09 72446
## - Months.Since.Policy.Inception  1 1.6200e+07 7.6859e+09 72450
## + Policy                         8 1.0165e+07 7.6596e+09 72451
## - Marital.Status                 2 2.2219e+07 7.6920e+09 72452
## - Coverage                       2 2.7695e+07 7.6974e+09 72456
## - Number.of.Open.Complaints      1 3.6511e+07 7.7062e+09 72464
## - EmploymentStatus               4 6.4840e+07 7.7346e+09 72476
## - Monthly.Premium.Auto           1 3.2569e+09 1.0927e+10 74240
## - Number.of.Policies             2 2.3957e+10 3.1626e+10 79646
## 
## Step:  AIC=72438.5
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus + 
##     Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type + 
##     Total.Claim.Amount + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Renew.Offer.Type               3 4.6009e+06 7.6820e+09 72436
## - Location.Code                  2 1.7688e+06 7.6792e+09 72436
## - Total.Claim.Amount             1 2.1381e+06 7.6795e+09 72438
## <none>                                        7.6774e+09 72439
## - Months.Since.Last.Claim        1 3.1116e+06 7.6805e+09 72439
## - Income                         1 3.3716e+06 7.6808e+09 72439
## - Gender                         1 3.4424e+06 7.6808e+09 72439
## - Vehicle.Size                   2 7.8655e+06 7.6853e+09 72440
## + Policy.Type                    2 2.1156e+06 7.6753e+09 72441
## + Education                      4 7.6644e+06 7.6697e+09 72441
## + Sales.Channel                  3 3.4298e+06 7.6740e+09 72442
## + State                          4 5.8331e+06 7.6716e+09 72443
## - Response                       1 9.3335e+06 7.6867e+09 72443
## + Vehicle.Class                  3 1.3988e+06 7.6760e+09 72444
## - Months.Since.Policy.Inception  1 1.6500e+07 7.6939e+09 72447
## + Policy                         8 1.0351e+07 7.6670e+09 72448
## - Marital.Status                 2 2.3181e+07 7.7006e+09 72450
## - Coverage                       2 2.7109e+07 7.7045e+09 72452
## - Number.of.Open.Complaints      1 3.6299e+07 7.7137e+09 72460
## - EmploymentStatus               4 6.4868e+07 7.7423e+09 72473
## - Monthly.Premium.Auto           1 3.2608e+09 1.0938e+10 74238
## - Number.of.Policies             2 2.4021e+10 3.1698e+10 79649
## 
## Step:  AIC=72435.55
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus + 
##     Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Total.Claim.Amount + 
##     Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Location.Code                  2 1.7360e+06 7.6837e+09 72433
## - Total.Claim.Amount             1 1.9583e+06 7.6840e+09 72435
## - Income                         1 2.9924e+06 7.6850e+09 72436
## <none>                                        7.6820e+09 72436
## - Months.Since.Last.Claim        1 3.0446e+06 7.6850e+09 72436
## - Gender                         1 3.5435e+06 7.6855e+09 72436
## - Vehicle.Size                   2 7.5915e+06 7.6896e+09 72437
## + Policy.Type                    2 2.0816e+06 7.6799e+09 72438
## + Renew.Offer.Type               3 4.6009e+06 7.6774e+09 72439
## + Education                      4 7.4811e+06 7.6745e+09 72439
## + Sales.Channel                  3 3.6840e+06 7.6783e+09 72439
## - Response                       1 9.0126e+06 7.6910e+09 72440
## + State                          4 5.9100e+06 7.6761e+09 72440
## + Vehicle.Class                  3 1.3791e+06 7.6806e+09 72441
## - Months.Since.Policy.Inception  1 1.5797e+07 7.6978e+09 72444
## + Policy                         8 1.0153e+07 7.6718e+09 72445
## - Marital.Status                 2 2.2065e+07 7.7041e+09 72446
## - Coverage                       2 2.7254e+07 7.7093e+09 72450
## - Number.of.Open.Complaints      1 3.5197e+07 7.7172e+09 72457
## - EmploymentStatus               4 6.3869e+07 7.7459e+09 72470
## - Monthly.Premium.Auto           1 3.3096e+09 1.0992e+10 74256
## - Number.of.Policies             2 2.4741e+10 3.2423e+10 79758
## 
## Step:  AIC=72432.7
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus + 
##     Gender + Income + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Total.Claim.Amount + 
##     Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Total.Claim.Amount             1 6.3751e+05 7.6844e+09 72431
## - Months.Since.Last.Claim        1 2.9557e+06 7.6867e+09 72433
## <none>                                        7.6837e+09 72433
## - Income                         1 3.6235e+06 7.6874e+09 72433
## - Gender                         1 3.7022e+06 7.6874e+09 72433
## - Vehicle.Size                   2 8.3326e+06 7.6921e+09 72434
## + Policy.Type                    2 2.1562e+06 7.6816e+09 72435
## + Location.Code                  2 1.7360e+06 7.6820e+09 72436
## + Renew.Offer.Type               3 4.5681e+06 7.6792e+09 72436
## + Education                      4 7.2930e+06 7.6764e+09 72436
## - Response                       1 8.2597e+06 7.6920e+09 72436
## + Sales.Channel                  3 3.7246e+06 7.6800e+09 72436
## + State                          4 5.9367e+06 7.6778e+09 72437
## + Vehicle.Class                  3 1.3893e+06 7.6823e+09 72438
## - Months.Since.Policy.Inception  1 1.5955e+07 7.6997e+09 72441
## + Policy                         8 1.0312e+07 7.6734e+09 72442
## - Marital.Status                 2 2.1545e+07 7.7053e+09 72443
## - Coverage                       2 2.6879e+07 7.7106e+09 72446
## - Number.of.Open.Complaints      1 3.5393e+07 7.7191e+09 72454
## - EmploymentStatus               4 6.4499e+07 7.7482e+09 72467
## - Monthly.Premium.Auto           1 4.4837e+09 1.2167e+10 74769
## - Number.of.Policies             2 2.4739e+10 3.2423e+10 79754
## 
## Step:  AIC=72431.12
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus + 
##     Gender + Income + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Last.Claim + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + Number.of.Policies + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## - Months.Since.Last.Claim        1 2.9838e+06 7.6874e+09 72431
## <none>                                        7.6844e+09 72431
## - Income                         1 3.2174e+06 7.6876e+09 72431
## - Gender                         1 3.4614e+06 7.6878e+09 72431
## - Vehicle.Size                   2 7.8152e+06 7.6922e+09 72432
## + Total.Claim.Amount             1 6.3751e+05 7.6837e+09 72433
## + Policy.Type                    2 2.1597e+06 7.6822e+09 72434
## + Education                      4 7.4794e+06 7.6769e+09 72434
## + Renew.Offer.Type               3 4.4394e+06 7.6799e+09 72434
## + Sales.Channel                  3 3.7303e+06 7.6806e+09 72435
## - Response                       1 8.5652e+06 7.6929e+09 72435
## + Location.Code                  2 4.1526e+05 7.6840e+09 72435
## + State                          4 6.0283e+06 7.6783e+09 72435
## + Vehicle.Class                  3 1.4433e+06 7.6829e+09 72436
## - Months.Since.Policy.Inception  1 1.6114e+07 7.7005e+09 72440
## + Policy                         8 1.0339e+07 7.6740e+09 72440
## - Marital.Status                 2 2.0935e+07 7.7053e+09 72441
## - Coverage                       2 2.7035e+07 7.7114e+09 72445
## - Number.of.Open.Complaints      1 3.5446e+07 7.7198e+09 72453
## - EmploymentStatus               4 6.3885e+07 7.7483e+09 72465
## - Monthly.Premium.Auto           1 5.4545e+09 1.3139e+10 75158
## - Number.of.Policies             2 2.4740e+10 3.2425e+10 79752
## 
## Step:  AIC=72431.1
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus + 
##     Gender + Income + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Policy.Inception + Number.of.Open.Complaints + 
##     Number.of.Policies + Vehicle.Size
## 
##                                 Df  Sum of Sq        RSS   AIC
## <none>                                        7.6874e+09 72431
## + Months.Since.Last.Claim        1 2.9838e+06 7.6844e+09 72431
## - Income                         1 3.5107e+06 7.6909e+09 72431
## - Gender                         1 3.5340e+06 7.6909e+09 72431
## - Vehicle.Size                   2 7.7985e+06 7.6952e+09 72432
## + Total.Claim.Amount             1 6.6564e+05 7.6867e+09 72433
## + Policy.Type                    2 2.2362e+06 7.6851e+09 72434
## + Education                      4 7.5526e+06 7.6798e+09 72434
## + Renew.Offer.Type               3 4.3723e+06 7.6830e+09 72434
## + Sales.Channel                  3 3.6938e+06 7.6837e+09 72435
## + Location.Code                  2 4.1757e+05 7.6869e+09 72435
## - Response                       1 8.7344e+06 7.6961e+09 72435
## + State                          4 6.0297e+06 7.6813e+09 72435
## + Vehicle.Class                  3 1.4438e+06 7.6859e+09 72436
## - Months.Since.Policy.Inception  1 1.5357e+07 7.7027e+09 72439
## + Policy                         8 1.0484e+07 7.6769e+09 72440
## - Marital.Status                 2 2.0809e+07 7.7082e+09 72441
## - Coverage                       2 2.7092e+07 7.7144e+09 72445
## - Number.of.Open.Complaints      1 3.5663e+07 7.7230e+09 72453
## - EmploymentStatus               4 6.3326e+07 7.7507e+09 72465
## - Monthly.Premium.Auto           1 5.4593e+09 1.3147e+10 75159
## - Number.of.Policies             2 2.4738e+10 3.2425e+10 79751
summary(model)
## 
## Call:
## lm(formula = Customer.Lifetime.Value ~ Response + Coverage + 
##     EmploymentStatus + Gender + Income + Marital.Status + Monthly.Premium.Auto + 
##     Months.Since.Policy.Inception + Number.of.Open.Complaints + 
##     Number.of.Policies + Vehicle.Size, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3231.0  -435.1    22.6   442.1  7059.1 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -1.988e+03  1.361e+02 -14.611  < 2e-16 ***
## ResponseYes                    1.225e+02  5.105e+01   2.400 0.016437 *  
## CoverageExtended               1.349e+01  4.241e+01   0.318 0.750398    
## CoveragePremium                3.338e+02  8.084e+01   4.129 3.71e-05 ***
## EmploymentStatusEmployed       3.352e+02  9.244e+01   3.626 0.000291 ***
## EmploymentStatusMedical Leave  8.404e+01  1.152e+02   0.729 0.465882    
## EmploymentStatusRetired       -4.023e+01  1.297e+02  -0.310 0.756411    
## EmploymentStatusUnemployed    -9.293e+01  9.393e+01  -0.989 0.322548    
## GenderM                       -5.294e+01  3.468e+01  -1.527 0.126938    
## Income                         1.467e-03  9.643e-04   1.521 0.128197    
## Marital.StatusMarried          6.934e+01  5.064e+01   1.369 0.170978    
## Marital.StatusSingle          -9.233e+01  5.894e+01  -1.566 0.117318    
## Monthly.Premium.Auto           6.088e+01  1.015e+00  59.998  < 2e-16 ***
## Months.Since.Policy.Inception -1.967e+00  6.181e-01  -3.182 0.001470 ** 
## Number.of.Open.Complaints     -9.071e+01  1.871e+01  -4.849 1.28e-06 ***
## Number.of.Policies>3           3.232e+03  4.241e+01  76.214  < 2e-16 ***
## Number.of.Policies2-3          5.193e+03  4.133e+01 125.638  < 2e-16 ***
## Vehicle.SizeMedsize           -6.071e+01  5.704e+01  -1.064 0.287204    
## Vehicle.SizeSmall             -1.415e+02  6.654e+01  -2.127 0.033481 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1231 on 5069 degrees of freedom
## Multiple R-squared:  0.8069, Adjusted R-squared:  0.8062 
## F-statistic:  1176 on 18 and 5069 DF,  p-value: < 2.2e-16
vif(model)
##                                   GVIF Df GVIF^(1/(2*Df))
## Response                      1.103689  1        1.050566
## Coverage                      1.338046  2        1.075518
## EmploymentStatus              3.393559  4        1.165016
## Gender                        1.008177  1        1.004080
## Income                        2.847800  1        1.687543
## Marital.Status                1.177034  2        1.041591
## Monthly.Premium.Auto          1.334899  1        1.155378
## Months.Since.Policy.Inception 1.002681  1        1.001340
## Number.of.Open.Complaints     1.004348  1        1.002171
## Number.of.Policies            1.010246  2        1.002552
## Vehicle.Size                  1.015671  2        1.003895
model = step(lm(Customer.Lifetime.Value~I(Coverage=="Premium")+
                  I(EmploymentStatus=="Employed")+
                      Monthly.Premium.Auto +
                      Months.Since.Policy.Inception+
                      Number.of.Open.Complaints+
                      I(Number.of.Policies == "<2")+
                      I(Number.of.Policies == "2-3")+
                      I(Number.of.Policies == ">3")
                  , data = trainData), direction = "both")
## Start:  AIC=72452.57
## Customer.Lifetime.Value ~ I(Coverage == "Premium") + I(EmploymentStatus == 
##     "Employed") + Monthly.Premium.Auto + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + I(Number.of.Policies == "<2") + 
##     I(Number.of.Policies == "2-3") + I(Number.of.Policies == 
##     ">3")
## 
## 
## Step:  AIC=72452.57
## Customer.Lifetime.Value ~ I(Coverage == "Premium") + I(EmploymentStatus == 
##     "Employed") + Monthly.Premium.Auto + Months.Since.Policy.Inception + 
##     Number.of.Open.Complaints + I(Number.of.Policies == "<2") + 
##     I(Number.of.Policies == "2-3")
## 
##                                     Df  Sum of Sq        RSS   AIC
## <none>                                            7.7533e+09 72453
## - Months.Since.Policy.Inception      1   14467634 7.7678e+09 72460
## - I(Coverage == "Premium")           1   26797427 7.7801e+09 72468
## - Number.of.Open.Complaints          1   37345279 7.7907e+09 72475
## - I(EmploymentStatus == "Employed")  1  280078275 8.0334e+09 72631
## - I(Number.of.Policies == "2-3")     1 2997053531 1.0750e+10 74113
## - Monthly.Premium.Auto               1 6569810186 1.4323e+10 75573
## - I(Number.of.Policies == "<2")      1 8833377737 1.6587e+10 76320
summary(model)
## 
## Call:
## lm(formula = Customer.Lifetime.Value ~ I(Coverage == "Premium") + 
##     I(EmploymentStatus == "Employed") + Monthly.Premium.Auto + 
##     Months.Since.Policy.Inception + Number.of.Open.Complaints + 
##     I(Number.of.Policies == "<2") + I(Number.of.Policies == "2-3"), 
##     data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3336.1  -460.2    34.0   440.4  7045.5 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            1078.3298    91.3251  11.808
## I(Coverage == "Premium")TRUE            325.2615    77.6244   4.190
## I(EmploymentStatus == "Employed")TRUE   492.7177    36.3723  13.547
## Monthly.Premium.Auto                     61.4341     0.9364  65.609
## Months.Since.Policy.Inception            -1.9074     0.6195  -3.079
## Number.of.Open.Complaints               -92.6752    18.7352  -4.947
## I(Number.of.Policies == "<2")TRUE     -3230.5683    42.4646 -76.077
## I(Number.of.Policies == "2-3")TRUE     1961.2182    44.2579  44.313
##                                       Pr(>|t|)    
## (Intercept)                            < 2e-16 ***
## I(Coverage == "Premium")TRUE          2.83e-05 ***
## I(EmploymentStatus == "Employed")TRUE  < 2e-16 ***
## Monthly.Premium.Auto                   < 2e-16 ***
## Months.Since.Policy.Inception          0.00209 ** 
## Number.of.Open.Complaints             7.80e-07 ***
## I(Number.of.Policies == "<2")TRUE      < 2e-16 ***
## I(Number.of.Policies == "2-3")TRUE     < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1235 on 5080 degrees of freedom
## Multiple R-squared:  0.8052, Adjusted R-squared:  0.8049 
## F-statistic:  3000 on 7 and 5080 DF,  p-value: < 2.2e-16
vif(model)
##          I(Coverage == "Premium") I(EmploymentStatus == "Employed") 
##                          1.128470                          1.003126 
##              Monthly.Premium.Auto     Months.Since.Policy.Inception 
##                          1.129449                          1.000720 
##         Number.of.Open.Complaints     I(Number.of.Policies == "<2") 
##                          1.001026                          1.423118 
##    I(Number.of.Policies == "2-3") 
##                          1.425878
par(mfrow=c(2,2))
plot(model)

dwtest(model)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.9952, p-value = 0.4326
## alternative hypothesis: true autocorrelation is greater than 0
RMSError = RMSE(model$fitted.values, trainData$Customer.Lifetime.Value, na.rm = TRUE)

MAPE = 100*sum(abs(model$residuals/trainData$Customer.Lifetime.Value))/nrow(trainData)

RMSError
## [1] 1234.441
MAPE
## [1] 13.86105

## [1] 0.8988717
## [1] 1242.833
## [1] 13.9844
#Model is a good fit

model
## 
## Call:
## lm(formula = Customer.Lifetime.Value ~ I(Coverage == "Premium") + 
##     I(EmploymentStatus == "Employed") + Monthly.Premium.Auto + 
##     Months.Since.Policy.Inception + Number.of.Open.Complaints + 
##     I(Number.of.Policies == "<2") + I(Number.of.Policies == "2-3"), 
##     data = trainData)
## 
## Coefficients:
##                           (Intercept)  
##                              1078.330  
##          I(Coverage == "Premium")TRUE  
##                               325.261  
## I(EmploymentStatus == "Employed")TRUE  
##                               492.718  
##                  Monthly.Premium.Auto  
##                                61.434  
##         Months.Since.Policy.Inception  
##                                -1.907  
##             Number.of.Open.Complaints  
##                               -92.675  
##     I(Number.of.Policies == "<2")TRUE  
##                             -3230.568  
##    I(Number.of.Policies == "2-3")TRUE  
##                              1961.218